import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from scipy.stats import zscore
import seaborn as sns
from sklearn.cluster import AgglomerativeClustering
veh = pd.read_csv(r'C:\Users\Akash Barwad\Documents\AIML\documents\Project Data\Project-5 - Unsupervised Learning\vehicle-1.csv')
vehtry = pd.read_csv(r'C:\Users\Akash Barwad\Documents\AIML\documents\Project Data\Project-5 - Unsupervised Learning\vehicle-1.csv')
veh.head(30)
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 95.0 | 48.0 | 83.0 | 178.0 | 72.0 | 10.0 | 162.0 | 42.0 | 20.0 | 159.0 | 176.0 | 379.0 | 184.0 | 70.0 | 6.0 | 16.0 | 187.0 | 197.0 | van |
| 1 | 91.0 | 41.0 | 84.0 | 141.0 | 57.0 | 9.0 | 149.0 | 45.0 | 19.0 | 143.0 | 170.0 | 330.0 | 158.0 | 72.0 | 9.0 | 14.0 | 189.0 | 199.0 | van |
| 2 | 104.0 | 50.0 | 106.0 | 209.0 | 66.0 | 10.0 | 207.0 | 32.0 | 23.0 | 158.0 | 223.0 | 635.0 | 220.0 | 73.0 | 14.0 | 9.0 | 188.0 | 196.0 | car |
| 3 | 93.0 | 41.0 | 82.0 | 159.0 | 63.0 | 9.0 | 144.0 | 46.0 | 19.0 | 143.0 | 160.0 | 309.0 | 127.0 | 63.0 | 6.0 | 10.0 | 199.0 | 207.0 | van |
| 4 | 85.0 | 44.0 | 70.0 | 205.0 | 103.0 | 52.0 | 149.0 | 45.0 | 19.0 | 144.0 | 241.0 | 325.0 | 188.0 | 127.0 | 9.0 | 11.0 | 180.0 | 183.0 | bus |
| 5 | 107.0 | NaN | 106.0 | 172.0 | 50.0 | 6.0 | 255.0 | 26.0 | 28.0 | 169.0 | 280.0 | 957.0 | 264.0 | 85.0 | 5.0 | 9.0 | 181.0 | 183.0 | bus |
| 6 | 97.0 | 43.0 | 73.0 | 173.0 | 65.0 | 6.0 | 153.0 | 42.0 | 19.0 | 143.0 | 176.0 | 361.0 | 172.0 | 66.0 | 13.0 | 1.0 | 200.0 | 204.0 | bus |
| 7 | 90.0 | 43.0 | 66.0 | 157.0 | 65.0 | 9.0 | 137.0 | 48.0 | 18.0 | 146.0 | 162.0 | 281.0 | 164.0 | 67.0 | 3.0 | 3.0 | 193.0 | 202.0 | van |
| 8 | 86.0 | 34.0 | 62.0 | 140.0 | 61.0 | 7.0 | 122.0 | 54.0 | 17.0 | 127.0 | 141.0 | 223.0 | 112.0 | 64.0 | 2.0 | 14.0 | 200.0 | 208.0 | van |
| 9 | 93.0 | 44.0 | 98.0 | NaN | 62.0 | 11.0 | 183.0 | 36.0 | 22.0 | 146.0 | 202.0 | 505.0 | 152.0 | 64.0 | 4.0 | 14.0 | 195.0 | 204.0 | car |
| 10 | 86.0 | 36.0 | 70.0 | 143.0 | 61.0 | 9.0 | 133.0 | 50.0 | 18.0 | 130.0 | 153.0 | 266.0 | 127.0 | 66.0 | 2.0 | 10.0 | 194.0 | 202.0 | van |
| 11 | 90.0 | 34.0 | 66.0 | 136.0 | 55.0 | 6.0 | 123.0 | 54.0 | 17.0 | 118.0 | 148.0 | 224.0 | 118.0 | 65.0 | 5.0 | 26.0 | 196.0 | 202.0 | car |
| 12 | 88.0 | 46.0 | 74.0 | 171.0 | 68.0 | 6.0 | 152.0 | 43.0 | 19.0 | 148.0 | 180.0 | 349.0 | 192.0 | 71.0 | 5.0 | 11.0 | 189.0 | 195.0 | bus |
| 13 | 89.0 | 42.0 | 85.0 | 144.0 | 58.0 | 10.0 | 152.0 | 44.0 | 19.0 | 144.0 | 173.0 | 345.0 | 161.0 | 72.0 | 8.0 | 13.0 | 187.0 | 197.0 | van |
| 14 | 94.0 | 49.0 | 79.0 | 203.0 | 71.0 | 5.0 | 174.0 | 37.0 | 21.0 | 154.0 | 196.0 | 465.0 | 206.0 | 71.0 | 6.0 | 2.0 | 197.0 | 199.0 | bus |
| 15 | 96.0 | 55.0 | 103.0 | 201.0 | 65.0 | 9.0 | 204.0 | 32.0 | 23.0 | 166.0 | 227.0 | 624.0 | 246.0 | 74.0 | 6.0 | 2.0 | 186.0 | 194.0 | car |
| 16 | 89.0 | 36.0 | 51.0 | 109.0 | 52.0 | 6.0 | 118.0 | 57.0 | 17.0 | 129.0 | 137.0 | 206.0 | 125.0 | 80.0 | 2.0 | 14.0 | 181.0 | 185.0 | van |
| 17 | 99.0 | 41.0 | 77.0 | 197.0 | 69.0 | 6.0 | 177.0 | 36.0 | 21.0 | 139.0 | 202.0 | 485.0 | 151.0 | 72.0 | 4.0 | 10.0 | 198.0 | 199.0 | bus |
| 18 | 104.0 | 54.0 | 100.0 | 186.0 | 61.0 | 10.0 | 216.0 | 31.0 | 24.0 | 173.0 | 225.0 | 686.0 | 220.0 | 74.0 | 5.0 | 11.0 | 185.0 | 195.0 | car |
| 19 | 101.0 | 56.0 | 100.0 | 215.0 | NaN | 10.0 | 208.0 | 32.0 | 24.0 | 169.0 | 227.0 | 651.0 | 223.0 | 74.0 | 6.0 | 5.0 | 186.0 | 193.0 | car |
| 20 | 84.0 | 47.0 | 75.0 | 153.0 | 64.0 | 6.0 | 154.0 | 43.0 | 19.0 | 145.0 | 175.0 | 354.0 | 184.0 | 75.0 | 0.0 | 3.0 | 185.0 | 192.0 | bus |
| 21 | 84.0 | 37.0 | 53.0 | 121.0 | 59.0 | 5.0 | 123.0 | 55.0 | 17.0 | 125.0 | 141.0 | 221.0 | 133.0 | 82.0 | 7.0 | 1.0 | 179.0 | 183.0 | van |
| 22 | 94.0 | 43.0 | 64.0 | 173.0 | 69.0 | 7.0 | 150.0 | 43.0 | 19.0 | 142.0 | 169.0 | 344.0 | 177.0 | 68.0 | 9.0 | 1.0 | 199.0 | 206.0 | bus |
| 23 | 87.0 | 39.0 | 70.0 | 148.0 | 61.0 | 7.0 | 143.0 | 46.0 | 18.0 | 136.0 | 164.0 | 307.0 | 141.0 | 69.0 | 1.0 | 2.0 | 192.0 | 199.0 | bus |
| 24 | 99.0 | 53.0 | 105.0 | 219.0 | 66.0 | 11.0 | 204.0 | 32.0 | 23.0 | 165.0 | 221.0 | 623.0 | 224.0 | 68.0 | 0.0 | 6.0 | 191.0 | 201.0 | car |
| 25 | 85.0 | 45.0 | 80.0 | 154.0 | 64.0 | 9.0 | 147.0 | 45.0 | 19.0 | 148.0 | 169.0 | 324.0 | 174.0 | 71.0 | 1.0 | 4.0 | 188.0 | 199.0 | van |
| 26 | 83.0 | 36.0 | 54.0 | 119.0 | 57.0 | 6.0 | 128.0 | 53.0 | 18.0 | 125.0 | 143.0 | 238.0 | 139.0 | 82.0 | 6.0 | 3.0 | 179.0 | 183.0 | car |
| 27 | 107.0 | 54.0 | 98.0 | 203.0 | 65.0 | 11.0 | 218.0 | 31.0 | 25.0 | 167.0 | 229.0 | 696.0 | 216.0 | 72.0 | 1.0 | 28.0 | 187.0 | 199.0 | car |
| 28 | 102.0 | 45.0 | 85.0 | 193.0 | 64.0 | 6.0 | 192.0 | 33.0 | 22.0 | 146.0 | 217.0 | 570.0 | 163.0 | 76.0 | 6.0 | 7.0 | 195.0 | 193.0 | bus |
| 29 | 80.0 | 38.0 | 63.0 | 129.0 | 55.0 | 7.0 | 146.0 | 46.0 | 19.0 | 130.0 | 168.0 | 314.0 | 158.0 | 83.0 | 9.0 | 20.0 | 180.0 | 185.0 | car |
veh.columns
Index(['compactness', 'circularity', 'distance_circularity', 'radius_ratio',
'pr.axis_aspect_ratio', 'max.length_aspect_ratio', 'scatter_ratio',
'elongatedness', 'pr.axis_rectangularity', 'max.length_rectangularity',
'scaled_variance', 'scaled_variance.1', 'scaled_radius_of_gyration',
'scaled_radius_of_gyration.1', 'skewness_about', 'skewness_about.1',
'skewness_about.2', 'hollows_ratio', 'class'],
dtype='object')
veh.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 847 entries, 0 to 846 Data columns (total 19 columns): compactness 846 non-null float64 circularity 841 non-null float64 distance_circularity 842 non-null float64 radius_ratio 840 non-null float64 pr.axis_aspect_ratio 844 non-null float64 max.length_aspect_ratio 846 non-null float64 scatter_ratio 845 non-null float64 elongatedness 845 non-null float64 pr.axis_rectangularity 843 non-null float64 max.length_rectangularity 846 non-null float64 scaled_variance 843 non-null float64 scaled_variance.1 844 non-null float64 scaled_radius_of_gyration 844 non-null float64 scaled_radius_of_gyration.1 842 non-null float64 skewness_about 840 non-null float64 skewness_about.1 845 non-null float64 skewness_about.2 845 non-null float64 hollows_ratio 846 non-null float64 class 846 non-null object dtypes: float64(18), object(1) memory usage: 125.8+ KB
for feature in veh.columns: # Loop through all columns in the dataframe
if veh[feature].dtype == 'object': # Only apply for columns with categorical strings
veh[feature] = pd.Categorical(veh[feature]).codes # Replace strings with an integer
veh.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 847 entries, 0 to 846 Data columns (total 19 columns): compactness 846 non-null float64 circularity 841 non-null float64 distance_circularity 842 non-null float64 radius_ratio 840 non-null float64 pr.axis_aspect_ratio 844 non-null float64 max.length_aspect_ratio 846 non-null float64 scatter_ratio 845 non-null float64 elongatedness 845 non-null float64 pr.axis_rectangularity 843 non-null float64 max.length_rectangularity 846 non-null float64 scaled_variance 843 non-null float64 scaled_variance.1 844 non-null float64 scaled_radius_of_gyration 844 non-null float64 scaled_radius_of_gyration.1 842 non-null float64 skewness_about 840 non-null float64 skewness_about.1 845 non-null float64 skewness_about.2 845 non-null float64 hollows_ratio 846 non-null float64 class 847 non-null int8 dtypes: float64(18), int8(1) memory usage: 120.0 KB
veh.isnull().values.any()
True
veh.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| compactness | 846.0 | 93.678487 | 8.234474 | 73.0 | 87.00 | 93.0 | 100.0 | 119.0 |
| circularity | 841.0 | 44.828775 | 6.152172 | 33.0 | 40.00 | 44.0 | 49.0 | 59.0 |
| distance_circularity | 842.0 | 82.110451 | 15.778292 | 40.0 | 70.00 | 80.0 | 98.0 | 112.0 |
| radius_ratio | 840.0 | 168.888095 | 33.520198 | 104.0 | 141.00 | 167.0 | 195.0 | 333.0 |
| pr.axis_aspect_ratio | 844.0 | 61.678910 | 7.891463 | 47.0 | 57.00 | 61.0 | 65.0 | 138.0 |
| max.length_aspect_ratio | 846.0 | 8.567376 | 4.601217 | 2.0 | 7.00 | 8.0 | 10.0 | 55.0 |
| scatter_ratio | 845.0 | 168.901775 | 33.214848 | 112.0 | 147.00 | 157.0 | 198.0 | 265.0 |
| elongatedness | 845.0 | 40.933728 | 7.816186 | 26.0 | 33.00 | 43.0 | 46.0 | 61.0 |
| pr.axis_rectangularity | 843.0 | 20.582444 | 2.592933 | 17.0 | 19.00 | 20.0 | 23.0 | 29.0 |
| max.length_rectangularity | 846.0 | 147.998818 | 14.515652 | 118.0 | 137.00 | 146.0 | 159.0 | 188.0 |
| scaled_variance | 843.0 | 188.631079 | 31.411004 | 130.0 | 167.00 | 179.0 | 217.0 | 320.0 |
| scaled_variance.1 | 844.0 | 439.494076 | 176.666903 | 184.0 | 318.00 | 363.5 | 587.0 | 1018.0 |
| scaled_radius_of_gyration | 844.0 | 174.709716 | 32.584808 | 109.0 | 149.00 | 173.5 | 198.0 | 268.0 |
| scaled_radius_of_gyration.1 | 842.0 | 72.447743 | 7.486190 | 59.0 | 67.00 | 71.5 | 75.0 | 135.0 |
| skewness_about | 840.0 | 6.364286 | 4.920649 | 0.0 | 2.00 | 6.0 | 9.0 | 22.0 |
| skewness_about.1 | 845.0 | 12.602367 | 8.936081 | 0.0 | 5.00 | 11.0 | 19.0 | 41.0 |
| skewness_about.2 | 845.0 | 188.919527 | 6.155809 | 176.0 | 184.00 | 188.0 | 193.0 | 206.0 |
| hollows_ratio | 846.0 | 195.632388 | 7.438797 | 181.0 | 190.25 | 197.0 | 201.0 | 211.0 |
veh.shape
(847, 19)
veh.size
16093
sns.pairplot(veh)
C:\Users\Akash Barwad\Anaconda3\lib\site-packages\numpy\lib\histograms.py:824: RuntimeWarning: invalid value encountered in greater_equal keep = (tmp_a >= first_edge) C:\Users\Akash Barwad\Anaconda3\lib\site-packages\numpy\lib\histograms.py:825: RuntimeWarning: invalid value encountered in less_equal keep &= (tmp_a <= last_edge)
<seaborn.axisgrid.PairGrid at 0x1e01782ed30>
veh.columns
Index(['compactness', 'circularity', 'distance_circularity', 'radius_ratio',
'pr.axis_aspect_ratio', 'max.length_aspect_ratio', 'scatter_ratio',
'elongatedness', 'pr.axis_rectangularity', 'max.length_rectangularity',
'scaled_variance', 'scaled_variance.1', 'scaled_radius_of_gyration',
'scaled_radius_of_gyration.1', 'skewness_about', 'skewness_about.1',
'skewness_about.2', 'hollows_ratio', 'class'],
dtype='object')
sns.set(style="whitegrid")
ax = sns.boxplot(x=veh['radius_ratio'],whis=1.5)
sns.set(style="whitegrid")
ax = sns.boxplot(x=veh['pr.axis_aspect_ratio'],whis=1.5)
sns.set(style="whitegrid")
ax = sns.boxplot(x=veh['max.length_aspect_ratio'],whis=1.5)
sns.set(style="whitegrid")
ax = sns.boxplot(x=veh['scaled_variance'],whis=1.5)
sns.set(style="whitegrid")
ax = sns.boxplot(x=veh['scaled_variance.1'],whis=1.5)
sns.set(style="whitegrid")
ax = sns.boxplot(x=veh['scaled_radius_of_gyration.1'],whis=1.5)
sns.set(style="whitegrid")
ax = sns.boxplot(x=veh['skewness_about'],whis=1.5)
veh.mean()
compactness 93.678487 circularity 44.828775 distance_circularity 82.110451 radius_ratio 168.888095 pr.axis_aspect_ratio 61.678910 max.length_aspect_ratio 8.567376 scatter_ratio 168.901775 elongatedness 40.933728 pr.axis_rectangularity 20.582444 max.length_rectangularity 147.998818 scaled_variance 188.631079 scaled_variance.1 439.494076 scaled_radius_of_gyration 174.709716 scaled_radius_of_gyration.1 72.447743 skewness_about 6.364286 skewness_about.1 12.602367 skewness_about.2 188.919527 hollows_ratio 195.632388 dtype: float64
cor = veh[['compactness', 'circularity', 'distance_circularity', 'radius_ratio',
'pr.axis_aspect_ratio', 'max.length_aspect_ratio', 'scatter_ratio',
'elongatedness', 'pr.axis_rectangularity', 'max.length_rectangularity',
'scaled_variance', 'scaled_variance.1', 'scaled_radius_of_gyration',
'scaled_radius_of_gyration.1', 'skewness_about', 'skewness_about.1',
'skewness_about.2', 'hollows_ratio', 'class']].corr()
cor
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| compactness | 1.000000 | 0.689786 | 0.791707 | 0.691081 | 0.091779 | 0.148249 | 0.812770 | -0.788736 | 0.814248 | 0.676143 | 0.764361 | 0.818674 | 0.585845 | -0.250603 | 0.236685 | 0.157670 | 0.298528 | 0.365552 | -0.033796 |
| circularity | 0.689786 | 1.000000 | 0.797180 | 0.625051 | 0.154283 | 0.251407 | 0.858265 | -0.827246 | 0.856603 | 0.965729 | 0.806791 | 0.850863 | 0.935950 | 0.053080 | 0.144968 | -0.011869 | -0.106339 | 0.045652 | -0.160546 |
| distance_circularity | 0.791707 | 0.797180 | 1.000000 | 0.771748 | 0.158684 | 0.264621 | 0.907949 | -0.913020 | 0.896273 | 0.775149 | 0.865710 | 0.890541 | 0.706950 | -0.227001 | 0.114665 | 0.266049 | 0.146027 | 0.333648 | -0.065209 |
| radius_ratio | 0.691081 | 0.625051 | 0.771748 | 1.000000 | 0.665363 | 0.450486 | 0.738480 | -0.792946 | 0.712744 | 0.571083 | 0.798294 | 0.725598 | 0.541325 | -0.181520 | 0.049112 | 0.174469 | 0.382912 | 0.472339 | -0.182921 |
| pr.axis_aspect_ratio | 0.091779 | 0.154283 | 0.158684 | 0.665363 | 1.000000 | 0.648861 | 0.103832 | -0.183492 | 0.079566 | 0.127322 | 0.273738 | 0.089750 | 0.122454 | 0.152860 | -0.058539 | -0.032180 | 0.240201 | 0.267760 | -0.098431 |
| max.length_aspect_ratio | 0.148249 | 0.251407 | 0.264621 | 0.450486 | 0.648861 | 1.000000 | 0.165998 | -0.180053 | 0.161603 | 0.305943 | 0.319033 | 0.143745 | 0.189752 | 0.295638 | 0.015446 | 0.043491 | -0.026184 | 0.143919 | 0.207619 |
| scatter_ratio | 0.812770 | 0.858265 | 0.907949 | 0.738480 | 0.103832 | 0.165998 | 1.000000 | -0.973504 | 0.992078 | 0.810017 | 0.951672 | 0.996328 | 0.800577 | -0.028006 | 0.074376 | 0.213512 | 0.005171 | 0.118504 | -0.288904 |
| elongatedness | -0.788736 | -0.827246 | -0.913020 | -0.792946 | -0.183492 | -0.180053 | -0.973504 | 1.000000 | -0.950405 | -0.776150 | -0.938313 | -0.956488 | -0.766671 | 0.103535 | -0.052243 | -0.186027 | -0.114846 | -0.216769 | 0.339348 |
| pr.axis_rectangularity | 0.814248 | 0.856603 | 0.896273 | 0.712744 | 0.079566 | 0.161603 | 0.992078 | -0.950405 | 1.000000 | 0.813135 | 0.938182 | 0.992316 | 0.798522 | -0.015711 | 0.083219 | 0.215200 | -0.019066 | 0.099481 | -0.259102 |
| max.length_rectangularity | 0.676143 | 0.965729 | 0.775149 | 0.571083 | 0.127322 | 0.305943 | 0.810017 | -0.776150 | 0.813135 | 1.000000 | 0.746657 | 0.797485 | 0.866554 | 0.041283 | 0.136077 | 0.001660 | -0.104437 | 0.076770 | -0.032399 |
| scaled_variance | 0.764361 | 0.806791 | 0.865710 | 0.798294 | 0.273738 | 0.319033 | 0.951672 | -0.938313 | 0.938182 | 0.746657 | 1.000000 | 0.949766 | 0.781016 | 0.112452 | 0.036165 | 0.196202 | 0.014434 | 0.086708 | -0.312836 |
| scaled_variance.1 | 0.818674 | 0.850863 | 0.890541 | 0.725598 | 0.089750 | 0.143745 | 0.996328 | -0.956488 | 0.992316 | 0.797485 | 0.949766 | 1.000000 | 0.797318 | -0.016642 | 0.077288 | 0.202398 | 0.006648 | 0.103839 | -0.288146 |
| scaled_radius_of_gyration | 0.585845 | 0.935950 | 0.706950 | 0.541325 | 0.122454 | 0.189752 | 0.800577 | -0.766671 | 0.798522 | 0.866554 | 0.781016 | 0.797318 | 1.000000 | 0.192245 | 0.166785 | -0.056067 | -0.225882 | -0.118597 | -0.250967 |
| scaled_radius_of_gyration.1 | -0.250603 | 0.053080 | -0.227001 | -0.181520 | 0.152860 | 0.295638 | -0.028006 | 0.103535 | -0.015711 | 0.041283 | 0.112452 | -0.016642 | 0.192245 | 1.000000 | -0.088736 | -0.126686 | -0.752437 | -0.804793 | -0.213049 |
| skewness_about | 0.236685 | 0.144968 | 0.114665 | 0.049112 | -0.058539 | 0.015446 | 0.074376 | -0.052243 | 0.083219 | 0.136077 | 0.036165 | 0.077288 | 0.166785 | -0.088736 | 1.000000 | -0.035154 | 0.115728 | 0.097293 | 0.119652 |
| skewness_about.1 | 0.157670 | -0.011869 | 0.266049 | 0.174469 | -0.032180 | 0.043491 | 0.213512 | -0.186027 | 0.215200 | 0.001660 | 0.196202 | 0.202398 | -0.056067 | -0.126686 | -0.035154 | 1.000000 | 0.077460 | 0.205115 | -0.010674 |
| skewness_about.2 | 0.298528 | -0.106339 | 0.146027 | 0.382912 | 0.240201 | -0.026184 | 0.005171 | -0.114846 | -0.019066 | -0.104437 | 0.014434 | 0.006648 | -0.225882 | -0.752437 | 0.115728 | 0.077460 | 1.000000 | 0.893869 | 0.067251 |
| hollows_ratio | 0.365552 | 0.045652 | 0.333648 | 0.472339 | 0.267760 | 0.143919 | 0.118504 | -0.216769 | 0.099481 | 0.076770 | 0.086708 | 0.103839 | -0.118597 | -0.804793 | 0.097293 | 0.205115 | 0.893869 | 1.000000 | 0.235874 |
| class | -0.033796 | -0.160546 | -0.065209 | -0.182921 | -0.098431 | 0.207619 | -0.288904 | 0.339348 | -0.259102 | -0.032399 | -0.312836 | -0.288146 | -0.250967 | -0.213049 | 0.119652 | -0.010674 | 0.067251 | 0.235874 | 1.000000 |
sns.heatmap(cor,annot=True,annot_kws={"size": 1.0},cmap='BuPu',vmin=-1,vmax=1)
<matplotlib.axes._subplots.AxesSubplot at 0x1e036d70ac8>
train = veh
null_columns=train.columns[train.isnull().any()]
train[null_columns].isnull().sum()
compactness 1 circularity 6 distance_circularity 5 radius_ratio 7 pr.axis_aspect_ratio 3 max.length_aspect_ratio 1 scatter_ratio 2 elongatedness 2 pr.axis_rectangularity 4 max.length_rectangularity 1 scaled_variance 4 scaled_variance.1 3 scaled_radius_of_gyration 3 scaled_radius_of_gyration.1 5 skewness_about 7 skewness_about.1 2 skewness_about.2 2 hollows_ratio 1 class 1 dtype: int64
print(train[train.isnull().any(axis=1)][null_columns].head())
compactness circularity distance_circularity radius_ratio \
5 107.0 NaN 106.0 172.0
9 93.0 44.0 98.0 NaN
19 101.0 56.0 100.0 215.0
35 100.0 46.0 NaN 172.0
66 81.0 43.0 68.0 125.0
pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio \
5 50.0 6.0 255.0
9 62.0 11.0 183.0
19 NaN 10.0 208.0
35 67.0 9.0 157.0
66 57.0 8.0 149.0
elongatedness pr.axis_rectangularity max.length_rectangularity \
5 26.0 28.0 169.0
9 36.0 22.0 146.0
19 32.0 24.0 169.0
35 43.0 20.0 150.0
66 46.0 19.0 146.0
scaled_variance scaled_variance.1 scaled_radius_of_gyration \
5 280.0 957.0 264.0
9 202.0 505.0 152.0
19 227.0 651.0 223.0
35 170.0 363.0 184.0
66 169.0 323.0 172.0
scaled_radius_of_gyration.1 skewness_about skewness_about.1 \
5 85.0 5.0 9.0
9 64.0 4.0 14.0
19 74.0 6.0 5.0
35 67.0 17.0 7.0
66 NaN NaN 18.0
skewness_about.2 hollows_ratio class
5 181.0 183.0 bus
9 195.0 204.0 car
19 186.0 193.0 car
35 192.0 200.0 van
66 179.0 184.0 bus
veh1=veh.fillna(veh.mean())
train = veh1
null_columns=train.columns[train.isnull().any()]
train[null_columns].isnull().sum()
class 1 dtype: int64
print(train[train.isnull().any(axis=1)][null_columns].head())
class 846 NaN
veh1=veh1.dropna()
veh.shape
(847, 19)
veh1.head(10)
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 95.0 | 48.000000 | 83.0 | 178.000000 | 72.0 | 10.0 | 162.0 | 42.0 | 20.0 | 159.0 | 176.0 | 379.0 | 184.0 | 70.0 | 6.0 | 16.0 | 187.0 | 197.0 | van |
| 1 | 91.0 | 41.000000 | 84.0 | 141.000000 | 57.0 | 9.0 | 149.0 | 45.0 | 19.0 | 143.0 | 170.0 | 330.0 | 158.0 | 72.0 | 9.0 | 14.0 | 189.0 | 199.0 | van |
| 2 | 104.0 | 50.000000 | 106.0 | 209.000000 | 66.0 | 10.0 | 207.0 | 32.0 | 23.0 | 158.0 | 223.0 | 635.0 | 220.0 | 73.0 | 14.0 | 9.0 | 188.0 | 196.0 | car |
| 3 | 93.0 | 41.000000 | 82.0 | 159.000000 | 63.0 | 9.0 | 144.0 | 46.0 | 19.0 | 143.0 | 160.0 | 309.0 | 127.0 | 63.0 | 6.0 | 10.0 | 199.0 | 207.0 | van |
| 4 | 85.0 | 44.000000 | 70.0 | 205.000000 | 103.0 | 52.0 | 149.0 | 45.0 | 19.0 | 144.0 | 241.0 | 325.0 | 188.0 | 127.0 | 9.0 | 11.0 | 180.0 | 183.0 | bus |
| 5 | 107.0 | 44.828775 | 106.0 | 172.000000 | 50.0 | 6.0 | 255.0 | 26.0 | 28.0 | 169.0 | 280.0 | 957.0 | 264.0 | 85.0 | 5.0 | 9.0 | 181.0 | 183.0 | bus |
| 6 | 97.0 | 43.000000 | 73.0 | 173.000000 | 65.0 | 6.0 | 153.0 | 42.0 | 19.0 | 143.0 | 176.0 | 361.0 | 172.0 | 66.0 | 13.0 | 1.0 | 200.0 | 204.0 | bus |
| 7 | 90.0 | 43.000000 | 66.0 | 157.000000 | 65.0 | 9.0 | 137.0 | 48.0 | 18.0 | 146.0 | 162.0 | 281.0 | 164.0 | 67.0 | 3.0 | 3.0 | 193.0 | 202.0 | van |
| 8 | 86.0 | 34.000000 | 62.0 | 140.000000 | 61.0 | 7.0 | 122.0 | 54.0 | 17.0 | 127.0 | 141.0 | 223.0 | 112.0 | 64.0 | 2.0 | 14.0 | 200.0 | 208.0 | van |
| 9 | 93.0 | 44.000000 | 98.0 | 168.888095 | 62.0 | 11.0 | 183.0 | 36.0 | 22.0 | 146.0 | 202.0 | 505.0 | 152.0 | 64.0 | 4.0 | 14.0 | 195.0 | 204.0 | car |
Q1 = veh.quantile(0.25)
Q3 = veh.quantile(0.75)
IQR = Q3 - Q1
print(IQR)
compactness 13.00 circularity 9.00 distance_circularity 28.00 radius_ratio 54.00 pr.axis_aspect_ratio 8.00 max.length_aspect_ratio 3.00 scatter_ratio 51.00 elongatedness 13.00 pr.axis_rectangularity 4.00 max.length_rectangularity 22.00 scaled_variance 50.00 scaled_variance.1 269.00 scaled_radius_of_gyration 49.00 scaled_radius_of_gyration.1 8.00 skewness_about 7.00 skewness_about.1 14.00 skewness_about.2 9.00 hollows_ratio 10.75 dtype: float64
print(veh1 < (Q1 - 1.5 * IQR))
(veh1 > (Q3 + 1.5 * IQR))
circularity class compactness distance_circularity elongatedness \
0 False False False False False
1 False False False False False
2 False False False False False
3 False False False False False
4 False False False False False
5 False False False False False
6 False False False False False
7 False False False False False
8 False False False False False
9 False False False False False
10 False False False False False
11 False False False False False
12 False False False False False
13 False False False False False
14 False False False False False
15 False False False False False
16 False False False False False
17 False False False False False
18 False False False False False
19 False False False False False
20 False False False False False
21 False False False False False
22 False False False False False
23 False False False False False
24 False False False False False
25 False False False False False
26 False False False False False
27 False False False False False
28 False False False False False
29 False False False False False
.. ... ... ... ... ...
816 False False False False False
817 False False False False False
818 False False False False False
819 False False False False False
820 False False False False False
821 False False False False False
822 False False False False False
823 False False False False False
824 False False False False False
825 False False False False False
826 False False False False False
827 False False False False False
828 False False False False False
829 False False False False False
830 False False False False False
831 False False False False False
832 False False False False False
833 False False False False False
834 False False False False False
835 False False False False False
836 False False False False False
837 False False False False False
838 False False False False False
839 False False False False False
840 False False False False False
841 False False False False False
842 False False False False False
843 False False False False False
844 False False False False False
845 False False False False False
hollows_ratio max.length_aspect_ratio max.length_rectangularity \
0 False False False
1 False False False
2 False False False
3 False False False
4 False False False
5 False False False
6 False False False
7 False False False
8 False False False
9 False False False
10 False False False
11 False False False
12 False False False
13 False False False
14 False False False
15 False False False
16 False False False
17 False False False
18 False False False
19 False False False
20 False False False
21 False False False
22 False False False
23 False False False
24 False False False
25 False False False
26 False False False
27 False False False
28 False False False
29 False False False
.. ... ... ...
816 False False False
817 False False False
818 False False False
819 False False False
820 False False False
821 False False False
822 False False False
823 False False False
824 False False False
825 False False False
826 False False False
827 False False False
828 False False False
829 False False False
830 False False False
831 False False False
832 False False False
833 False False False
834 False False False
835 False False False
836 False False False
837 False False False
838 False False False
839 False False False
840 False False False
841 False False False
842 False False False
843 False False False
844 False False False
845 False False False
pr.axis_aspect_ratio pr.axis_rectangularity radius_ratio \
0 False False False
1 False False False
2 False False False
3 False False False
4 False False False
5 False False False
6 False False False
7 False False False
8 False False False
9 False False False
10 False False False
11 False False False
12 False False False
13 False False False
14 False False False
15 False False False
16 False False False
17 False False False
18 False False False
19 False False False
20 False False False
21 False False False
22 False False False
23 False False False
24 False False False
25 False False False
26 False False False
27 False False False
28 False False False
29 False False False
.. ... ... ...
816 False False False
817 False False False
818 False False False
819 False False False
820 False False False
821 False False False
822 False False False
823 False False False
824 False False False
825 False False False
826 False False False
827 False False False
828 False False False
829 False False False
830 False False False
831 False False False
832 False False False
833 False False False
834 False False False
835 False False False
836 False False False
837 False False False
838 False False False
839 False False False
840 False False False
841 False False False
842 False False False
843 False False False
844 False False False
845 False False False
scaled_radius_of_gyration scaled_radius_of_gyration.1 scaled_variance \
0 False False False
1 False False False
2 False False False
3 False False False
4 False False False
5 False False False
6 False False False
7 False False False
8 False False False
9 False False False
10 False False False
11 False False False
12 False False False
13 False False False
14 False False False
15 False False False
16 False False False
17 False False False
18 False False False
19 False False False
20 False False False
21 False False False
22 False False False
23 False False False
24 False False False
25 False False False
26 False False False
27 False False False
28 False False False
29 False False False
.. ... ... ...
816 False False False
817 False False False
818 False False False
819 False False False
820 False False False
821 False False False
822 False False False
823 False False False
824 False False False
825 False False False
826 False False False
827 False False False
828 False False False
829 False False False
830 False False False
831 False False False
832 False False False
833 False False False
834 False False False
835 False False False
836 False False False
837 False False False
838 False False False
839 False False False
840 False False False
841 False False False
842 False False False
843 False False False
844 False False False
845 False False False
scaled_variance.1 scatter_ratio skewness_about skewness_about.1 \
0 False False False False
1 False False False False
2 False False False False
3 False False False False
4 False False False False
5 False False False False
6 False False False False
7 False False False False
8 False False False False
9 False False False False
10 False False False False
11 False False False False
12 False False False False
13 False False False False
14 False False False False
15 False False False False
16 False False False False
17 False False False False
18 False False False False
19 False False False False
20 False False False False
21 False False False False
22 False False False False
23 False False False False
24 False False False False
25 False False False False
26 False False False False
27 False False False False
28 False False False False
29 False False False False
.. ... ... ... ...
816 False False False False
817 False False False False
818 False False False False
819 False False False False
820 False False False False
821 False False False False
822 False False False False
823 False False False False
824 False False False False
825 False False False False
826 False False False False
827 False False False False
828 False False False False
829 False False False False
830 False False False False
831 False False False False
832 False False False False
833 False False False False
834 False False False False
835 False False False False
836 False False False False
837 False False False False
838 False False False False
839 False False False False
840 False False False False
841 False False False False
842 False False False False
843 False False False False
844 False False False False
845 False False False False
skewness_about.2
0 False
1 False
2 False
3 False
4 False
5 False
6 False
7 False
8 False
9 False
10 False
11 False
12 False
13 False
14 False
15 False
16 False
17 False
18 False
19 False
20 False
21 False
22 False
23 False
24 False
25 False
26 False
27 False
28 False
29 False
.. ...
816 False
817 False
818 False
819 False
820 False
821 False
822 False
823 False
824 False
825 False
826 False
827 False
828 False
829 False
830 False
831 False
832 False
833 False
834 False
835 False
836 False
837 False
838 False
839 False
840 False
841 False
842 False
843 False
844 False
845 False
[846 rows x 19 columns]
| circularity | class | compactness | distance_circularity | elongatedness | hollows_ratio | max.length_aspect_ratio | max.length_rectangularity | pr.axis_aspect_ratio | pr.axis_rectangularity | radius_ratio | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | scaled_variance | scaled_variance.1 | scatter_ratio | skewness_about | skewness_about.1 | skewness_about.2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 1 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 2 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 3 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 4 | False | False | False | False | False | False | True | False | True | False | False | False | True | False | False | False | False | False | False |
| 5 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 6 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 7 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 8 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 9 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 10 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 11 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 12 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 13 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 14 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 15 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 16 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 17 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 18 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 19 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 20 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 21 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 22 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 23 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 24 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 25 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 26 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 27 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 28 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 29 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 816 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 817 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 818 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 819 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 820 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 821 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 822 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 823 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 824 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 825 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 826 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 827 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 828 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 829 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 830 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 831 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 832 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 833 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 834 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 835 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | True | False | False | False | False |
| 836 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 837 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 838 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 839 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 840 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 841 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 842 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 843 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 844 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 845 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
846 rows × 19 columns
veh1_out = veh1[~((veh < (Q1 - 1.5 * IQR)) |(veh1 > (Q3 + 1.5 * IQR))).any(axis=1)]
veh1_out.shape
C:\Users\Akash Barwad\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index. """Entry point for launching an IPython kernel.
(813, 19)
veh2=veh1_out
veh2.shape
(813, 19)
veh1.shape
(846, 19)
X = veh2.iloc[:,0:17].values
y = veh2.iloc[:,18].values
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 10)
X_train = StandardScaler().fit_transform(X_train)
X_test = StandardScaler().fit_transform(X_test)
cov_matrix = np.cov(X_train.T)
print('Covariance Matrix \n%s', cov_matrix)
e_vals, e_vecs = np.linalg.eig(cov_matrix)
print('Eigenvectors \n%s' %e_vecs)
print('\nEigenvalues \n%s' %e_vals)
Covariance Matrix %s [[ 1.00176056 0.67303392 0.79719405 0.73128119 0.14226364 0.4897036 0.81465354 -0.79265308 0.81656737 0.67471764 0.79852657 0.82081443 0.58457404 -0.22514708 0.20633538 0.1376959 0.28394189] [ 0.67303392 1.00176056 0.79669218 0.6273473 0.15330433 0.56075265 0.84708563 -0.81675129 0.84343605 0.96151887 0.80216833 0.83752265 0.92804391 0.0902563 0.16192697 -0.00591522 -0.14006059] [ 0.79719405 0.79669218 1.00176056 0.80121117 0.20216171 0.66252609 0.91277666 -0.91517946 0.90056848 0.78322186 0.88794423 0.89737534 0.71146759 -0.20436425 0.11929571 0.24828495 0.11139934] [ 0.73128119 0.6273473 0.80121117 1.00176056 0.64795189 0.4625814 0.77366716 -0.8352769 0.74652657 0.56538905 0.77419265 0.76197567 0.53703184 -0.39555464 0.0393724 0.14745539 0.41265989] [ 0.14226364 0.15330433 0.20216171 0.64795189 1.00176056 0.12795773 0.14163715 -0.25369252 0.10942912 0.08996209 0.15394392 0.12665473 0.09981121 -0.34707899 -0.09365954 -0.05106815 0.41185533] [ 0.4897036 0.56075265 0.66252609 0.4625814 0.12795773 1.00176056 0.49350662 -0.50157516 0.49276247 0.64732382 0.40420005 0.45577812 0.39579378 -0.30658716 0.10369878 0.13530406 0.04777365] [ 0.81465354 0.84708563 0.91277666 0.77366716 0.14163715 0.49350662 1.00176056 -0.97625965 0.99167835 0.81639187 0.98011702 0.99829371 0.80522309 0.03697039 0.08634074 0.19842877 -0.02688949] [-0.79265308 -0.81675129 -0.91517946 -0.8352769 -0.25369252 -0.50157516 -0.97625965 1.00176056 -0.95216357 -0.77621348 -0.96529805 -0.96072489 -0.76877563 0.05628881 -0.06604732 -0.17390605 -0.07940512] [ 0.81656737 0.84343605 0.90056848 0.74652657 0.10942912 0.49276247 0.99167835 -0.95216357 1.00176056 0.82011071 0.96540125 0.99207234 0.80223464 0.04842655 0.09371673 0.2002221 -0.04647376] [ 0.67471764 0.96151887 0.78322186 0.56538905 0.08996209 0.64732382 0.81639187 -0.77621348 0.82011071 1.00176056 0.75608407 0.8045637 0.87080493 0.07830698 0.1605245 0.01560081 -0.13938041] [ 0.79852657 0.80216833 0.88794423 0.77419265 0.15394392 0.40420005 0.98011702 -0.96529805 0.96540125 0.75608407 1.00176056 0.97967948 0.78872743 0.04994741 0.05986088 0.19665229 -0.00139958] [ 0.82081443 0.83752265 0.89737534 0.76197567 0.12665473 0.45577812 0.99829371 -0.96072489 0.99207234 0.8045637 0.97967948 1.00176056 0.80209956 0.04576427 0.08921677 0.19065291 -0.02210679] [ 0.58457404 0.92804391 0.71146759 0.53703184 0.09981121 0.39579378 0.80522309 -0.76877563 0.80223464 0.87080493 0.78872743 0.80209956 1.00176056 0.24308557 0.18186495 -0.04770615 -0.24973902] [-0.22514708 0.0902563 -0.20436425 -0.39555464 -0.34707899 -0.30658716 0.03697039 0.05628881 0.04842655 0.07830698 0.04994741 0.04576427 0.24308557 1.00176056 -0.07308904 -0.08195419 -0.85140082] [ 0.20633538 0.16192697 0.11929571 0.0393724 -0.09365954 0.10369878 0.08634074 -0.06604732 0.09371673 0.1605245 0.05986088 0.08921677 0.18186495 -0.07308904 1.00176056 -0.05403233 0.09956892] [ 0.1376959 -0.00591522 0.24828495 0.14745539 -0.05106815 0.13530406 0.19842877 -0.17390605 0.2002221 0.01560081 0.19665229 0.19065291 -0.04770615 -0.08195419 -0.05403233 1.00176056 0.04995322] [ 0.28394189 -0.14006059 0.11139934 0.41265989 0.41185533 0.04777365 -0.02688949 -0.07940512 -0.04647376 -0.13938041 -0.00139958 -0.02210679 -0.24973902 -0.85140082 0.09956892 0.04995322 1.00176056]] Eigenvectors [[-2.71334066e-01 1.20206747e-01 4.72154231e-02 1.56869429e-01 1.58475547e-01 2.65690158e-01 1.46818644e-01 7.71084057e-01 -3.69533907e-01 4.21180705e-02 1.09454139e-02 -1.48200492e-03 6.14976978e-02 1.70502068e-03 -1.74269446e-01 -1.39775092e-02 2.13875903e-02] [-2.89453073e-01 -1.32335559e-01 1.88297668e-01 -8.61928654e-02 -1.41514553e-01 -7.56203706e-02 -4.05039921e-01 4.55234192e-02 -1.71499347e-02 -2.37742713e-01 4.46787925e-02 5.64420152e-04 3.97210550e-01 7.36260068e-02 7.77818481e-02 5.28581121e-01 3.95584643e-01] [-3.03078431e-01 6.95116758e-02 -5.16670127e-02 1.10984545e-01 -1.03423332e-01 1.70823710e-02 1.37979823e-01 -2.73653175e-01 -3.10049992e-01 4.29663738e-02 7.92187377e-01 -3.18141636e-03 -1.67763764e-01 -1.24169779e-02 1.19153679e-01 -4.25671379e-02 1.16427516e-01] [-2.63706299e-01 2.91968978e-01 -6.67524803e-02 -2.09077432e-01 8.68730517e-02 -1.42276142e-01 1.55358226e-01 -5.71601300e-02 -1.27533366e-01 -8.36735789e-02 -1.71654390e-01 -2.20712094e-02 4.74830955e-01 2.65488997e-03 5.71407945e-01 -3.59815169e-01 -8.47766265e-02] [-7.37773033e-02 3.67938167e-01 2.29725370e-03 -5.67716703e-01 -3.07225177e-03 -5.73252255e-01 1.03782695e-01 1.80855176e-01 -2.31535187e-03 -4.28430629e-02 6.42184336e-02 1.53979490e-02 -2.73637481e-01 -5.65001751e-03 -2.42000579e-01 1.58376247e-01 -2.26949031e-03] [-1.91234203e-01 9.68387083e-02 1.60851706e-01 2.29558968e-01 -7.37216108e-01 -8.70996667e-02 3.66572597e-01 6.63124514e-02 1.80697116e-01 3.06358154e-01 -1.95832637e-01 -1.41529397e-02 -1.71217086e-02 -8.78861418e-03 7.85951571e-02 1.17677014e-01 1.34698237e-02] [-3.14059300e-01 -5.54204246e-02 -1.02201955e-01 7.48961011e-03 1.04371760e-01 6.87282573e-02 1.04975265e-01 -9.83830054e-02 1.38874968e-01 -9.46739107e-02 -1.33719376e-01 7.90684389e-01 -8.83247139e-02 -3.90127797e-01 -6.87734039e-02 -1.09386405e-02 1.23755686e-01] [ 3.10312677e-01 -1.98298916e-02 9.58321540e-02 5.41173141e-02 -9.63851994e-02 -4.36229356e-02 -1.21773639e-01 2.17406591e-01 -2.68289765e-01 -1.65361829e-01 -2.08532507e-02 2.16251414e-01 -3.80946143e-01 -1.01813325e-01 6.24411197e-01 2.82983096e-01 -2.14323735e-01] [-3.11202402e-01 -6.97107118e-02 -9.59237500e-02 3.04579514e-02 9.79151902e-02 8.30312398e-02 9.55092317e-02 -4.64223785e-02 9.31880281e-02 -2.26122244e-01 -2.41548706e-01 -6.39116137e-03 -4.49147565e-01 6.76425368e-01 1.44240292e-01 -6.88872185e-02 2.39516975e-01] [-2.81592469e-01 -1.34679728e-01 2.00715657e-01 -4.24900544e-03 -2.56065087e-01 -3.70767252e-02 -3.58095026e-01 2.09838319e-01 3.13223856e-01 -3.60480787e-01 1.95863032e-01 -1.58020767e-02 -8.37822979e-02 -3.01827069e-02 -5.76030199e-02 -3.91511099e-01 -4.38014739e-01] [-3.05708718e-01 -4.77536591e-02 -1.41685198e-01 -2.85197784e-02 1.96085454e-01 9.20427282e-02 8.71606302e-02 -1.40979826e-01 6.09093940e-02 1.74861390e-01 1.36289234e-02 3.02141048e-02 1.08659709e-01 2.00403100e-01 -2.30730579e-02 4.98698066e-01 -6.87309593e-01] [-3.11320875e-01 -6.02537289e-02 -1.03359238e-01 7.38624496e-03 1.46124477e-01 9.57476906e-02 7.96132592e-02 -6.27871751e-02 1.02687102e-01 -1.55915806e-01 -1.83821724e-01 -5.70095660e-01 -2.86753869e-01 -5.75915444e-01 1.17926950e-01 1.46366254e-01 7.08557316e-02] [-2.69016134e-01 -2.16682502e-01 1.81864622e-01 -1.39812035e-01 1.70075325e-02 -1.11930092e-01 -4.37269108e-01 -1.12415272e-01 -3.60805608e-01 5.89248604e-01 -2.36427094e-01 8.10031788e-03 -1.97008163e-01 -3.57841834e-02 9.04672147e-03 -1.94389450e-01 -8.83089314e-03] [ 2.06404871e-02 -5.66522966e-01 -1.11528140e-01 -1.75349911e-01 1.73505003e-01 -1.99603915e-01 2.04703903e-01 3.45098649e-01 3.71454709e-01 3.08049985e-01 2.62069939e-01 -3.99176656e-03 5.64946942e-02 9.02679894e-03 2.84166017e-01 -3.15759970e-02 1.33815633e-01] [-4.35973104e-02 8.89043886e-03 6.14681090e-01 4.63293973e-01 4.12625798e-01 -4.38418103e-01 1.69523810e-01 -9.13245419e-02 4.89388310e-02 -4.95654928e-02 -9.74496146e-03 -2.38130513e-03 9.92048861e-03 3.59376434e-03 -1.36512185e-02 1.27234617e-02 -2.41899815e-02] [-5.30383731e-02 7.11267981e-02 -6.32336972e-01 5.11756978e-01 -1.45407103e-02 -4.85843815e-01 -2.78261056e-01 1.11102508e-01 -1.73627773e-02 2.20742869e-02 -4.26012907e-02 -9.46194076e-03 3.17231746e-02 -2.24330447e-03 -2.40955338e-02 -7.81677734e-03 2.79377806e-03] [-1.50704330e-02 5.72531420e-01 7.89911669e-02 7.28168687e-02 1.92644530e-01 2.31930785e-01 -3.31138328e-01 9.34557057e-02 4.84694543e-01 3.46181931e-01 1.51311729e-01 2.69646176e-02 -1.04136875e-01 7.61882691e-03 2.08214508e-01 5.33027921e-02 1.25133232e-01]] Eigenvalues [9.70498964e+00 2.58145261e+00 1.19948501e+00 1.14331327e+00 8.64368272e-01 6.55185311e-01 3.17786946e-01 2.17864457e-01 1.00304855e-01 7.41329330e-02 5.93348695e-02 3.35072789e-04 3.57053557e-02 8.35195971e-03 2.69815257e-02 1.85471577e-02 2.17903391e-02]
tot = sum(e_vals)
var_exp = [( i /tot ) * 100 for i in sorted(e_vals, reverse=True)]
cum_var_exp = np.cumsum(var_exp)
print("Cumulative Variance Explained", cum_var_exp)
Cumulative Variance Explained [ 56.98784364 72.14617179 79.1895656 85.90311813 90.97870149 94.82595939 96.69200907 97.97131241 98.56030401 98.9956137 99.34402893 99.55369131 99.71212723 99.84008043 99.94898962 99.99803245 100. ]
# Ploting
plt.figure(figsize=(10 , 5))
plt.bar(range(1, e_vals.size + 1), var_exp, alpha = 0.5, align = 'center', label = 'Individual explained variance')
plt.step(range(1, e_vals.size + 1), cum_var_exp, where='mid', label = 'Cumulative explained variance')
plt.ylabel('Explained Variance Ratio')
plt.xlabel('Principal Components')
plt.legend(loc = 'best')
plt.tight_layout()
plt.show()
eigen_pairs = [(np.abs(e_vals[i]), e_vecs[:,i]) for i in range(len(e_vals))]
eigen_pairs.sort(reverse=True)
eigen_pairs[:7]
[(9.70498963897293,
array([-0.27133407, -0.28945307, -0.30307843, -0.2637063 , -0.0737773 ,
-0.1912342 , -0.3140593 , 0.31031268, -0.3112024 , -0.28159247,
-0.30570872, -0.31132088, -0.26901613, 0.02064049, -0.04359731,
-0.05303837, -0.01507043])),
(2.5814526100817123,
array([ 0.12020675, -0.13233556, 0.06951168, 0.29196898, 0.36793817,
0.09683871, -0.05542042, -0.01982989, -0.06971071, -0.13467973,
-0.04775366, -0.06025373, -0.2166825 , -0.56652297, 0.00889044,
0.0711268 , 0.57253142])),
(1.1994850051042234,
array([ 0.04721542, 0.18829767, -0.05166701, -0.06675248, 0.00229725,
0.16085171, -0.10220196, 0.09583215, -0.09592375, 0.20071566,
-0.1416852 , -0.10335924, 0.18186462, -0.11152814, 0.61468109,
-0.63233697, 0.07899117])),
(1.1433132684759448,
array([ 0.15686943, -0.08619287, 0.11098454, -0.20907743, -0.5677167 ,
0.22955897, 0.00748961, 0.05411731, 0.03045795, -0.00424901,
-0.02851978, 0.00738624, -0.13981203, -0.17534991, 0.46329397,
0.51175698, 0.07281687])),
(0.8643682716555754,
array([ 0.15847555, -0.14151455, -0.10342333, 0.08687305, -0.00307225,
-0.73721611, 0.10437176, -0.0963852 , 0.09791519, -0.25606509,
0.19608545, 0.14612448, 0.01700753, 0.173505 , 0.4126258 ,
-0.01454071, 0.19264453])),
(0.6551853108346146,
array([ 0.26569016, -0.07562037, 0.01708237, -0.14227614, -0.57325226,
-0.08709967, 0.06872826, -0.04362294, 0.08303124, -0.03707673,
0.09204273, 0.09574769, -0.11193009, -0.19960392, -0.4384181 ,
-0.48584381, 0.23193079])),
(0.3177869464997268,
array([ 0.14681864, -0.40503992, 0.13797982, 0.15535823, 0.10378269,
0.3665726 , 0.10497526, -0.12177364, 0.09550923, -0.35809503,
0.08716063, 0.07961326, -0.43726911, 0.2047039 , 0.16952381,
-0.27826106, -0.33113833]))]
w = np.hstack((eigen_pairs[0][1].reshape(17,1),
eigen_pairs[1][1].reshape(17,1)))
print('Matrix W:\n', w)
X_pca = X_train.dot(w)
Matrix W: [[-0.27133407 0.12020675] [-0.28945307 -0.13233556] [-0.30307843 0.06951168] [-0.2637063 0.29196898] [-0.0737773 0.36793817] [-0.1912342 0.09683871] [-0.3140593 -0.05542042] [ 0.31031268 -0.01982989] [-0.3112024 -0.06971071] [-0.28159247 -0.13467973] [-0.30570872 -0.04775366] [-0.31132088 -0.06025373] [-0.26901613 -0.2166825 ] [ 0.02064049 -0.56652297] [-0.04359731 0.00889044] [-0.05303837 0.0711268 ] [-0.01507043 0.57253142]]
X_train.shape, w.shape, X_pca.shape
((569, 17), (17, 2), (569, 2))
X_pca
array([[-3.17335885, -0.65924239],
[-1.61895414, 0.70052913],
[ 5.05334739, -1.95490136],
...,
[ 0.08009224, 0.78708036],
[ 2.45561652, 1.15162265],
[ 1.71599444, 2.68719083]])
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
clf = SVC()
clf.fit(X_train, y_train)
print ('score', clf.score(X_test, y_test))
score 0.9672131147540983